Load in necessary packages
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(mapview)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(censusapi)
##
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
##
## getFunction
library(leaflet)
library(lehdr)
library(fs)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
The data used to assess the % of workers deemed essential was determined using the Labor Market Information (LMI) Institute’s Standard Occupation Classification (SOC) codes connectd to essential businesses. (https://www.lmiontheweb.org/more-than-half-of-u-s-workers-in-critical-occupations-in-the-fight-against-covid-19/). The SOC codes the LMI institute provided are based on the Cybersecurity and Infrastructure Security Agency’s list from April 17, 2020 (https://www.cisa.gov/sites/default/files/publications/Version_3.0_CISA_Guidance_on_Essential_Critical_Infrastructure_Workers_4.pdf). These codes have the advantage of being directly linked to census respondees so that each respondee has one SOC code.
Load Bay Area PUMA-level data. You can see how PUMA (Public Use Microdata Area) level regions compare to tract/blockgroup level regions here (https://tigerweb.geo.census.gov/tigerweb/) Visualization of the PUMAs
# get Bay Area County PUMAS
bay_area_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_pumas <- pumas("CA", cb=F, progress_bar=F) %>%
mutate(county = substr(GEOID10, start = 1, stop =5)) %>%
filter(county %in% c("06085","06081","06075","06041","06097","06055","06095","06013","06001"))
bay_pumas_codes <-
bay_pumas %>% pull(PUMACE10)
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
bay_area_county_names <-
c("Alameda","Contra Costa","Marin","Napa","San Francisco","San Mateo","Santa Clara","Solano","Sonoma")
#Water to clean up boundaries of PUMAs
water <-
bay_area_county_names %>%
map_dfr(function(county){ # this is the tidyverse version of a for loop. You can practice writing this the normal for loop way.
area_water("CA", county, progress_bar = FALSE) %>% as.data.frame()
}) %>%
st_as_sf() %>%
st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
)) %>%
st_union() %>%
as_Spatial() %>%
sp::disaggregate() %>%
st_as_sf() %>%
st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
)) %>%
mutate(area = st_area(.) %>% as.numeric()) %>%
filter(area == max(area)) %>%
dplyr::select(-area)
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
bay_pumas_fixed <-
bay_pumas %>%
st_difference(water)
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(bay_pumas_fixed)
print(str_c("There are ",dim(bay_pumas_fixed)[[1]], " PUMAs in the Bay Area")) #Nrows
## [1] "There are 55 PUMAs in the Bay Area"
data_dir <- "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/"
raw_soc <- readxl::read_xlsx(str_c(data_dir,"SOC-Codes-CISA-Critical-Infrastructure-Workers-with-OES-Data.xlsx"))
soc <- raw_soc %>%
mutate(Critical = replace_na(Critical, FALSE)) %>%
mutate(Critical = replace(Critical, Critical == "X",TRUE)) %>%
mutate(`SOC Occupation` = str_remove(`SOC Occupation`,"-")) %>% #Places SOC code in the format used in PUMS data
mutate(Critical = as.logical(Critical)) %>%
select(`SOC Occupation`,`Occupation Description`,Critical)
# #Visualize Breakdown
# soc %>%
# group_by(Critical) %>%
# summarize(SOCs = n()) %>%
# mutate(Critical = replace(Critical, Critical == FALSE ,"Nonessential")) %>%
# mutate(Critical = replace(Critical, Critical == TRUE ,"Essential")) %>%
# ggplot(aes(Critical,SOCs)) + geom_col()
# puma_soc <- read_csv("/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/PUMS/csv_pca/psam_p06.csv")
# colnames(puma_soc)
# bay_puma_soc <- puma_soc %>%
# filter(PUMA %in% bay_pumas_codes) #Filters for just 9 Bay Area Counties
# saveRDS(bay_puma_soc, file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds")
bay_puma <- readRDS(file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds") #PUMA = Public Use Microdata Area
#Join SOC Essential Designations to PUMA Survey Data
soc_essential <- bay_puma %>%
select("PUMA","SOCP") %>%
left_join(soc, by = c("SOCP" = "SOC Occupation")) %>%
mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) %>% #Designate those without SOCP as not in the labor force
mutate(`Occupation Description` = replace_na(`Occupation Description`, "Not Given")) #Label non-6 digit SOCP codes as not having description
(str_c("There are ",count(soc_essential) ," PUMA respondents in the Bay Area"))
## [1] "There are 376057 PUMA respondents in the Bay Area"
#Normalize survey for population using PWGTP weights
bay_puma_weighted <- data.frame(PUMA = rep(bay_puma$PUMA, times = bay_puma$PWGTP), SOCP = rep(bay_puma$SOCP, times = bay_puma$PWGTP))
soc_essential_weighted <- bay_puma_weighted %>%
left_join(soc, by = c("SOCP" = "SOC Occupation")) %>%
mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) %>% #Designate those without SOCP as not in the labor force
mutate(`Occupation Description` = replace_na(`Occupation Description`, "Not Given")) #Label non-6 digit SOCP codes as not having description
## Warning: Column `SOCP`/`SOC Occupation` joining factor and character vector,
## coercing into character vector
(str_c("POST WEIGHTING. There are ",count(soc_essential_weighted) ," PUMA respondents in the Bay Area", "\n",
"The adjustment increases the population by a factor of ", round(count(soc_essential_weighted)/count(soc_essential),1)
)
)
## [1] "POST WEIGHTING. There are 7675682 PUMA respondents in the Bay Area\nThe adjustment increases the population by a factor of 20.4"
#Visualize In Labor Force v. Not In Labor Force
soc_essential %>%
mutate("In Labor Force" = ifelse(`Occupation Description`!="Not In Labor Force",TRUE,FALSE)) %>%
group_by(`In Labor Force`) %>%
tally() %>%
ggplot(aes(x = reorder(`In Labor Force`,-n), y = n, fill = `In Labor Force`)) +
geom_col() +
labs(title = "Bay Area Population In Labor Force", subtitle = "In Labor Force means: \nA) less than 16 years old OR \nB) person who last worked more than 5 years ago or never worked", x = "", y = "Population")
#POST WEIGHTING Visualize In Labor Force v. Not In Labor Force
soc_essential_weighted %>%
mutate("In Labor Force" = ifelse(`Occupation Description`!="Not In Labor Force",TRUE,FALSE)) %>%
group_by(`In Labor Force`) %>%
tally() %>%
ggplot(aes(x = reorder(`In Labor Force`,-n), y = n, fill = `In Labor Force`)) +
geom_col() +
labs(title = "ADJUSTED Bay Area Population In Labor Force", subtitle = "In Labor Force means: \nA) less than 16 years old OR \nB) person who last worked more than 5 years ago or never worked", x = "", y = "Population")
#POST WEIGHTING: Visualize Top 10 Most Popular Occupations
soc_essential_weighted %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(SOCP, `Occupation Description`, Critical) %>%
tally() %>%
arrange(desc(n)) %>%
head(n = 10) %>%
ungroup() %>%
mutate(`Occupation Description` = replace(`Occupation Description`, SOCP == "1191XX", "Other Managers"),
`Occupation Description` = replace(`Occupation Description`, SOCP == "252020", "Elementary and Middle School Teachers"),
`Occupation Description` = replace(`Occupation Description`, SOCP == "412010", "Cashiers")
) %>%
ggplot(aes(reorder(SOCP,n),n, fill = `Critical`))+
geom_col()+
theme(axis.text.x = element_text(angle = 90))+
coord_flip()+
labs(title = "ADJUSTED Top 10 Occupations In Bay Area", x = "Occupation", y = "Population")+geom_label(aes(label = `Occupation Description`))
#Visualize percent is still NA
essential_visualize <- soc_essential_weighted %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(Critical) %>%
tally() %>%
mutate(percent = n/sum(n)*100) %>%
ggplot(aes(x = "", y = percent , fill = Critical), stat = "identity", position = "fill")+geom_col()+geom_text(aes(label = round(percent,2)), stat = "identity", position = position_stack(vjust = 0.5))+
labs(title = "Essential Worker Status in Bay Area", x = NULL)
essential_visualize
#Using fraction Essential instead of TRUE/FALSE
soc_frac <- soc %>%
mutate(Critical = as.numeric(Critical)) %>%
rename("fracEssential" = Critical)
get_fracEssential <- function(df){
df %>%
group_by(`SOC Occupation` , fracEssential) %>%
summarize(count = n()) %>%
spread(fracEssential,count) %>%
mutate(`1` = replace_na(`1`,0)) %>%
mutate(`0` = replace_na(`0`, 0)) %>%
rename("Nonessential" = `0`, "Essential" = `1` ) %>%
mutate(fracEssential = Essential/(Essential+Nonessential)) %>%
select(`SOC Occupation` ,fracEssential)
}
soc_3XXX_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`,1,3),"XXX")) %>%
get_fracEssential()
soc_4XX_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,4),"XX")) %>%
get_fracEssential()
soc_5X_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,5),"X")) %>%
get_fracEssential()
soc_3YYY_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`,1,3),"YYY")) %>%
get_fracEssential()
soc_4YY_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,4),"YY")) %>%
get_fracEssential()
soc_5Y_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,5),"Y")) %>%
get_fracEssential()
soc_5_0_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,5),"0")) %>%
get_fracEssential()
soc_3_000_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,3),"000")) %>%
get_fracEssential()
soc_frac_adj <- soc_frac %>%
full_join(soc_3XXX_dig) %>%
full_join(soc_4XX_dig) %>%
full_join(soc_5X_dig) %>%
full_join(soc_3YYY_dig) %>%
full_join(soc_4YY_dig) %>%
full_join(soc_5Y_dig) %>%
full_join(soc_5_0_dig) %>%
full_join(soc_3_000_dig)
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
soc_frac_adj <- soc_frac_adj %>%
add_row(`SOC Occupation` = "999920",`Occupation Description` = "Unemployed",fracEssential = 0) %>%
add_row(`SOC Occupation` = "559830",`Occupation Description` = "Military",fracEssential = 1)
soc_essential_weighted_frac <- bay_puma_weighted %>%
select("PUMA","SOCP") %>%
left_join(soc_frac_adj, by = c("SOCP" = "SOC Occupation")) %>%
mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) #Remove those not in the labor force
## Warning: Column `SOCP`/`SOC Occupation` joining factor and character vector,
## coercing into character vector
#Visualize percent breakdown of fraction essential
soc_essential_weighted_frac %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(fracEssential) %>%
tally() %>%
mutate(percent = n/sum(n)*100) %>%
ggplot(aes(x = "", y = percent , fill = fracEssential), stat = "identity", position = "fill")+geom_col()+
scale_fill_gradient(low="white", high="blue")+
geom_text(aes(label = round(percent,1)), stat = "identity", position = position_stack(vjust = 0.5))+
labs(title = "Essential Worker Status in Bay Area", x = NULL)
#Visualize percent breakdown of fraction essential including Not in Work Force
soc_essential_weighted_frac %>%
mutate(fracEssential = ifelse(is.na(SOCP),0,fracEssential)) %>% #Not in Workforce is included as non-essential
group_by(fracEssential) %>%
tally() %>%
mutate(percent = n/sum(n)*100) %>%
ggplot(aes(x = "", y = percent , fill = fracEssential), stat = "identity", position = "fill")+geom_col()+
scale_fill_gradient(low="white", high="blue")+
geom_text(aes(label = round(percent,1)), stat = "identity", position = position_stack(vjust = 0.5))+
labs(title = "Essential Worker Status in Bay Area including Not in Workforce", x = NULL)
#Average fracEssential by PUMA
soc_essential_puma <- soc_essential_weighted_frac %>%
filter(!is.na(SOCP)) %>% #Remove those not in work force
group_by(PUMA) %>%
summarize(fracEssential = round(mean(fracEssential)*100))
#Filter out those not in work force
soc_essential_puma_nilf <- soc_essential_weighted_frac %>%
mutate(fracEssential = ifelse(is.na(SOCP),0,fracEssential)) %>% #Not in Workforce is included as non-essential
group_by(PUMA) %>%
summarize(fracEssential = round(mean(fracEssential)*100))
#Filter out those not in work force
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(soc_essential_puma %>%
pull(fracEssential) %>%
unique())
)
bay_essential_soc_map <- leaflet(data = soc_essential_puma) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
soc_essential_puma %>%
left_join(bay_pumas_fixed, by = c("PUMA" = "PUMACE10")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"%"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Essential Workers")
## Warning: Column `PUMA`/`PUMACE10` joining factor and character vector, coercing
## into character vector
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
bay_essential_soc_map
red_pal <- colorNumeric(
palette = "Reds",
domain =
c(soc_essential_puma_nilf %>%
pull(fracEssential) %>%
unique())
)
bay_essential_soc_map_nilf <- leaflet(data = soc_essential_puma_nilf) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
soc_essential_puma_nilf %>%
left_join(bay_pumas_fixed, by = c("PUMA" = "PUMACE10")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~red_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"%"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = red_pal, values = ~fracEssential, opacity = 1, title = "% Essential Workers with Not In Labor Force")
## Warning: Column `PUMA`/`PUMACE10` joining factor and character vector, coercing
## into character vector
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
bay_essential_soc_map_nilf
bay_blockgroups <-
block_groups("CA", bay_area_county_names, cb=F, progress_bar=F)
bay_blockgroup_assigned_pumas <-
bay_blockgroups %>%
st_centroid() %>%
st_join(bay_pumas %>% select(PUMACE10), left = F) %>%
st_set_geometry(NULL) %>%
left_join(bay_pumas %>% dplyr::select(PUMACE10, geometry), by = "PUMACE10") %>%
st_as_sf() %>%
st_difference(water)
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries